In [450]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import brewer2mpl
import colorsys
import math
import dendropy as dp

from datetime import datetime
from Bio import AlignIO, SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Levenshtein import distance
from itertools import combinations, product, permutations
from time import time
from __future__ import division
from collections import Counter, defaultdict
from copy import deepcopy
from random import shuffle, choice, sample
from scipy.stats.mstats import mquantiles
from scipy.stats import norm, expon, poisson
from scipy.misc import comb
from IPython.display import Math
%matplotlib inline

In [451]:
segment = 8
supp = False

In [452]:
tree = dp.Tree.get_from_path('20141201 KY H3N8 Segment {0}.nexus'.format(segment), 'nexus')
tree


Out[452]:
<Tree object at 0x12071e810>

In [464]:
len(tree.taxon_set)


Out[464]:
319

In [453]:
# Y-tick, labels and xtick values
yticks = dict()
yticks[1] = np.arange(0, 16001, 4000)
yticks[2] = np.arange(0, 30001, 10000)
yticks[3] = np.arange(0, 16001, 4000)
yticks[4] = np.arange(0, 20001, 10000)
yticks[5] = np.arange(0, 30001, 10000)
yticks[6] = np.arange(0, 30001, 10000)
yticks[7] = np.arange(0, 30001, 10000)
yticks[8] = np.arange(0, 30001, 10000)

yticklabels = dict()
yticklabels[1] = np.arange(0, 17, 4)
yticklabels[2] = np.arange(0, 4, 1)
yticklabels[3] = np.arange(0, 17, 4)
yticklabels[4] = np.arange(0, 21, 10)
yticklabels[5] = np.arange(0, 31, 10)
yticklabels[6] = np.arange(0, 31, 10)
yticklabels[7] = np.arange(0, 31, 10)
yticklabels[8] = np.arange(0, 31, 10)

xticks = dict()
xticks[1] = np.arange(0, 121, 40)
xticks[2] = np.arange(0, 21, 10)
xticks[3] = np.arange(0, 21, 10)
xticks[4] = np.arange(0, 21, 10)
xticks[5] = np.arange(0, 21, 10)
xticks[6] = np.arange(0, 21, 10)
xticks[7] = np.arange(0, 21, 10)
xticks[8] = np.arange(0, 21, 10)

In [454]:
patristic_distances = dp.treecalc.PatristicDistanceMatrix(tree=tree).distances()
fig = plt.figure(figsize=(1.5,1.5))
ax = fig.add_subplot(111)
if segment == 1:
    bins = np.arange(0, 121, 10)
else:
    bins = np.arange(0, 21, 2)
ax.hist(patristic_distances, bins=bins)
ax.set_xlabel('Patristic Distance')
ax.set_ylabel('Counts (10$^3$)')
ax.set_title('All PDs'.format(segment))
ax.set_yticks(yticks[segment])
ax.set_yticklabels(yticklabels[segment])
ax.set_xticks(xticks[segment])
ax.annotate('d.', xy=(0,1), xycoords='figure fraction', ha='left', va='top')
plt.subplots_adjust(left=0.25, right=0.9, top=0.85, bottom=0.25)
plt.savefig('Segment {0} Patristic Distance Distribution.pdf'.format(segment))



In [455]:
# For supplementary figures.
patristic_distances = dp.treecalc.PatristicDistanceMatrix(tree=tree).distances()
fig = plt.figure(figsize=(1.5,1.5))
ax = fig.add_subplot(111)
if segment == 1:
    bins = np.arange(0, 121, 10)
else:
    bins = np.arange(0, 21, 2)
ax.hist(patristic_distances, bins=bins)
ax.set_xlabel('Patristic Distance')
ax.set_ylabel('Counts (10$^3$)')
ax.set_title('Segment {0}'.format(segment))
ax.set_yticks(yticks[segment])
ax.set_yticklabels(yticklabels[segment])
ax.set_xticks(xticks[segment])
# ax.annotate('d.', xy=(0,1), xycoords='figure fraction', ha='left', va='top')
plt.subplots_adjust(left=0.25, right=0.9, top=0.85, bottom=0.25)
plt.savefig('Segment {0} Patristic Distance Distribution Supplementary.pdf'.format(segment))



In [456]:
transmission_graph = nx.read_gpickle('Minto Flats.pkl')
transmission_graph.nodes(data=True)
for node in transmission_graph.nodes(data=True):
    if node[1]['subtype'] != 'H3N8':
        transmission_graph.remove_node(node[0])
        # transmission_graph.nodes(data=True)

In [457]:
len(transmission_graph.nodes())


Out[457]:
319

Because I don't have the taxa names identical between the tree and the transmission graph, therefore, I will use a dictionary mapping to map the taxa name in the graph to the taxa name in the tree.


In [458]:
taxadict = {str(t).split("'")[0].split("|")[0].replace("_", " ").replace('-', '/'):t for t in tree.taxon_set}
# taxadict

In [459]:
graph_pds = []
for (taxa1, taxa2) in transmission_graph.edges():
    # Note here that when exporting the taxa names, I mistakenly replaced '-' with '/', as such
    # names like blue-winged teal became blue/winged teal. Therefore, when reading in the isolate
    # names from the transmission graph, we have to replace the '-' character with '/'.
    taxa1 = taxa1.replace('-', '/')
    taxa2 = taxa2.replace('-', '/')
    t1 = taxadict[taxa1]
    t2 = taxadict[taxa2]
    pd = dp.treecalc.patristic_distance(tree, t1, t2)
    graph_pds.append(pd)

In [460]:
# Y-tick, labels and xtick values
yticks = dict()
yticks[1] = np.arange(0, 601, 200)
yticks[2] = np.arange(0, 601, 200)
yticks[3] = np.arange(0, 601, 200)
yticks[4] = np.arange(0, 601, 200)
yticks[5] = np.arange(0, 601, 200)
yticks[6] = np.arange(0, 601, 200)
yticks[7] = np.arange(0, 601, 200)
yticks[8] = np.arange(0, 601, 200)

yticklabels = dict()
yticklabels[1] = np.arange(0, 7, 2)
yticklabels[2] = np.arange(0, 7, 2)
yticklabels[3] = np.arange(0, 7, 2)
yticklabels[4] = np.arange(0, 7, 2)
yticklabels[5] = np.arange(0, 7, 2)
yticklabels[6] = np.arange(0, 7, 2)
yticklabels[7] = np.arange(0, 7, 2)
yticklabels[8] = np.arange(0, 7, 2)

xticks = dict()
xticks[1] = np.arange(0, 121, 40)
xticks[2] = np.arange(0, 21, 10)
xticks[3] = np.arange(0, 21, 10)
xticks[4] = np.arange(0, 21, 10)
xticks[5] = np.arange(0, 21, 10)
xticks[6] = np.arange(0, 21, 10)
xticks[7] = np.arange(0, 21, 10)
xticks[8] = np.arange(0, 21, 10)

In [461]:
# Make figure for patristic distance test comparison with 
fig = plt.figure(figsize=(1.5, 1.5))
ax = fig.add_subplot(111)
if segment == 1:
    bins = np.arange(0, 121, 10)
else:
    bins = np.arange(0, 21, 2)
ax.hist(graph_pds, color='r', bins=bins)
ax.set_xlabel('Patristic Distance')
ax.set_ylabel('Counts (10$^2$)')
ax.set_title('Network PDs'.format(segment))
ax.set_yticks(yticks[segment])
ax.set_yticklabels(yticklabels[segment])
ax.set_xticks(xticks[segment])
ax.annotate('e.', xy=(0,1), xycoords='figure fraction', ha='left', va='top')
plt.subplots_adjust(left=0.25, right=0.9, top=0.85, bottom=0.25)
plt.savefig('Segment {0} Network Patristic Distances.pdf'.format(segment))



In [462]:
# Make figure for patristic distance test panel in supplementary
# Make figure for patristic distance test comparison with 
fig = plt.figure(figsize=(1.5, 1.5))
ax = fig.add_subplot(111)
if segment == 1:
    bins = np.arange(0, 121, 10)
else:
    bins = np.arange(0, 21, 2)
ax.hist(graph_pds, color='r', bins=bins)
ax.set_xlabel('Patristic Distance')
ax.set_ylabel('Counts (10$^2$)')
ax.set_title('Segment {0}'.format(segment))
ax.set_yticks(yticks[segment])
ax.set_yticklabels(yticklabels[segment])
ax.set_xticks(xticks[segment])
# ax.annotate('e.', xy=(0,1), xycoords='figure fraction', ha='left', va='top')
plt.subplots_adjust(left=0.25, right=0.9, top=0.85, bottom=0.25)
plt.savefig('Segment {0} Network Patristic Distances Supplementary.pdf'.format(segment))



In [462]:


In [462]: